Libraries & Setup

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(ggridges)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-8
library(readr)
library(breakaway)
library(rioja)
## This is rioja  1.0-7
# Here we have made our input into smaller 100MB files and now read them in
setwd("rawdata/MetagenomicFirstRound/")
con <- pipe("cat part.gz_* | gunzip -c", "rb")
data <- read_csv(con)
## Rows: 1132002 Columns: 187
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (4): sample, tax_name, tax_rank, tax_path
## dbl (182): tax_id, N_reads, N_alignments, MAP_damage, MAP_significance, mean...
## lgl   (1): MAP_valid
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
close(con)
setwd("../../")
#metadata
metadata <- read.csv("metadata.csv")

# Minimum amount of damage filter
DamMin = 0.0
D_Max = 1.0
#Lambda Likelihood Ratio
LR = 0
# Minimum reads for parsing taxa
MinRead = 1
# Minimum mean read length
MinLength = 35

Metadata and seq mapping observations

Here we are looking at some values produced by fastp.

First lets look at the relationship between duplication percentage and ∆Ct (difference between the ideal number fo PCR cycles and used number of cycles). The est age of samples in BP is shown.

plot(metadata$Dct,metadata$duplication.rate,pch=16,ylab="% duplication")
abline(v=0,col="darkred",lwd=3)
text(0.2,60,labels="Underamplified", adj=0)
text(-0.2,60,labels="Overamplified", adj=1)
text(metadata$Dct,metadata$duplication.rate+1.5,labels = metadata$age,cex=0.4)

Now lets look at the low complexity proportion vs ∆Ct

plot(metadata$Dct,metadata$low.complexity,pch=16,ylab="% low complexity")
abline(v=0,col="darkred",lwd=3)
text(0.2,0.32,labels="Underamplified", adj=0)
text(-0.2,0.32,labels="Overamplified", adj=1)

Now how many post-fastp reads there are per sample.

plot(metadata$Dct,metadata$dedupReads,pch=16,ylab="Post fastp reads")
abline(v=0,col="darkred",lwd=3)
text(0.2,2.2*10^8,labels="Underamplified", adj=0)
text(-0.2,2.2*10^8,labels="Overamplified", adj=1)
text(metadata$Dct,metadata$dedupReads+5*10^6,labels = metadata$age,cex=0.4)

Age-Damage / Length relationships

First we wrangle the raw data

# Let's make an animal subset of the data 
df1 <- data %>% filter(MAP_damage > DamMin, N_reads >= MinRead, mean_L > MinLength, MAP_significance  > LR,  grepl("Metazoa",tax_path), grepl("^genus", tax_rank), grepl("", sample))
inputdata <- df1
inputdata$sample2 <- factor(inputdata$sample,levels= sort(unique(inputdata$sample),decreasing = TRUE))
inputdata$age <- metadata$age[match(inputdata$sample2,metadata$Sample)]
inputdata$site <- as.factor(metadata$CoreID[match(inputdata$sample2,metadata$Sample)])

raw.metazoa <- inputdata[inputdata$N_reads>99,]
raw.metazoa.50 <- inputdata[inputdata$N_reads>49,]
raw.metazoa.1  <- inputdata

raw.metazoa.50.wide <- raw.metazoa.50 %>%
  pivot_wider(
    id_cols = tax_name,         # Rows will be `tax_name`
    names_from = sample,        # Columns will be `sample`
    values_from = N_reads,      # Observations will be `N_reads`
    values_fill = 0             # Fill missing values with 0
  )


raw.metazoa.1.wide <- raw.metazoa.1 %>%
  pivot_wider(
    id_cols = tax_name,         # Rows will be `tax_name`
    names_from = sample,        # Columns will be `sample`
    values_from = N_reads,      # Observations will be `N_reads`
    values_fill = 0             # Fill missing values with 0
  )


raw.metazoa.wide <- raw.metazoa %>%
  pivot_wider(
    id_cols = tax_name,         # Rows will be `tax_name`
    names_from = sample,        # Columns will be `sample`
    values_from = N_reads,      # Observations will be `N_reads`
    values_fill = 0             # Fill missing values with 0
  )



# plants
df1 <- data %>% filter(MAP_damage > DamMin, N_reads >= MinRead, mean_L > MinLength, MAP_significance  > LR,  grepl("Viridiplantae",tax_path), grepl("^genus", tax_rank), grepl("", sample))
inputdata <- df1
inputdata$sample2 <- factor(inputdata$sample,levels= sort(unique(inputdata$sample),decreasing = TRUE))
inputdata$age <- metadata$age[match(inputdata$sample2,metadata$Sample)]
inputdata$site <- as.factor(metadata$CoreID[match(inputdata$sample2,metadata$Sample)])

raw.plants <- inputdata[inputdata$N_reads>99,]
raw.plants.50 <- inputdata[inputdata$N_reads>49,]

raw.plants.50.wide <- raw.plants.50 %>%
  pivot_wider(
    id_cols = tax_name,         # Rows will be `tax_name`
    names_from = sample,        # Columns will be `sample`
    values_from = N_reads,      # Observations will be `N_reads`
    values_fill = 0             # Fill missing values with 0
  )


raw.plants.wide <- raw.plants %>%
  pivot_wider(
    id_cols = tax_name,         # Rows will be `tax_name`
    names_from = sample,        # Columns will be `sample`
    values_from = N_reads,      # Observations will be `N_reads`
    values_fill = 0             # Fill missing values with 0
  )


# bacteria 

df1 <- data %>% filter(MAP_damage > DamMin, N_reads >= MinRead, mean_L > MinLength, MAP_significance  > LR,  grepl("Bacteria",tax_path), grepl("^genus", tax_rank), grepl("", sample))
inputdata <- df1
inputdata$sample2 <- factor(inputdata$sample,levels= sort(unique(inputdata$sample),decreasing = TRUE))
inputdata$age <- metadata$age[match(inputdata$sample2,metadata$Sample)]
inputdata$site <- as.factor(metadata$CoreID[match(inputdata$sample2,metadata$Sample)])

raw.bacteria <- inputdata[inputdata$N_reads>99,]
raw.bacteria.50<- inputdata[inputdata$N_reads>49,]

raw.bacteria.50.wide <- raw.bacteria.50 %>%
  pivot_wider(
    id_cols = tax_name,         # Rows will be `tax_name`
    names_from = sample,        # Columns will be `sample`
    values_from = N_reads,      # Observations will be `N_reads`
    values_fill = 0             # Fill missing values with 0
  )


raw.bacteria.wide <- raw.bacteria %>%
  pivot_wider(
    id_cols = tax_name,         # Rows will be `tax_name`
    names_from = sample,        # Columns will be `sample`
    values_from = N_reads,      # Observations will be `N_reads`
    values_fill = 0             # Fill missing values with 0
  )

Now we make some plots, first the animals

ggplot(raw.metazoa, aes(x = MAP_damage, y = sample2, group = sample2)) +
  geom_density_ridges2(scale = 3, alpha = 0.55) +  # Plot density ridges first
  geom_point(position = position_jitter(height = 0.1, width = 0), alpha = 0.6) + # Add points above ridges
  facet_wrap(~ site, scales = "free_y") + # Split into facets by coreID
  labs(y = "Samples (ordered by age)", x = "MAP Damage",title = "Metazoa MAP Damage")+
  geom_text(aes(x = max(MAP_damage), label = age), hjust = -0.1, size = 3,nudge_y = 0.4)
## Picking joint bandwidth of 0.00621
## Picking joint bandwidth of 0.0376
## Picking joint bandwidth of 0.0317
## Picking joint bandwidth of 0.042
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_text()`).

ggplot(raw.metazoa, aes(x = mean_L, y = sample2, group = sample2)) +
  geom_density_ridges2(scale = 3, alpha = 0.55) +  # Plot density ridges first
  geom_point(position = position_jitter(height = 0.1, width = 0), alpha = 0.6) + # Add points above ridges
  facet_wrap(~ site, scales = "free_y") + # Split into facets by coreID
  labs(y = "Samples (ordered by age)", x = "MAP Damage",title = "Metazoa Mean Length")+
  geom_text(aes(x = max(MAP_damage), label = age), hjust = -0.1, size = 3,nudge_y = 0.4)
## Picking joint bandwidth of 4.61
## Picking joint bandwidth of 3.38
## Picking joint bandwidth of 3.55
## Picking joint bandwidth of 3.35
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_text()`).

Now bacteria

ggplot(raw.bacteria, aes(x = MAP_damage, y = sample2, group = sample2)) +
  geom_density_ridges2(scale = 3, alpha = 0.55) +  # Plot density ridges first
  geom_point(position = position_jitter(height = 0.1, width = 0), alpha = 0.6) + # Add points above ridges
  facet_wrap(~ site, scales = "free_y") + # Split into facets by coreID
  labs(y = "Samples (ordered by age)", x = "MAP Damage",title = "Bacteria MAP Damage")+
  geom_text(aes(x = max(MAP_damage), label = age), hjust = -0.1, size = 3,nudge_y = 0.4)
## Picking joint bandwidth of 0.0026
## Picking joint bandwidth of 0.00941
## Picking joint bandwidth of 0.0166
## Picking joint bandwidth of 0.0342
## Warning: Removed 33 rows containing missing values or values outside the scale range
## (`geom_text()`).

ggplot(raw.bacteria, aes(x = mean_L, y = sample2, group = sample2)) +
  geom_density_ridges2(scale = 3, alpha = 0.55) +  # Plot density ridges first
  geom_point(position = position_jitter(height = 0.1, width = 0), alpha = 0.6) + # Add points above ridges
  facet_wrap(~ site, scales = "free_y") + # Split into facets by coreID
  labs(y = "Samples (ordered by age)", x = "MAP Damage",title = "Bacteria Mean Length")+
  geom_text(aes(x = max(MAP_damage), label = age), hjust = -0.1, size = 3,nudge_y = 0.4)
## Picking joint bandwidth of 7.26
## Picking joint bandwidth of 2.18
## Picking joint bandwidth of 2.83
## Picking joint bandwidth of 3.31
## Warning: Removed 33 rows containing missing values or values outside the scale range
## (`geom_text()`).

Now we make some plots, first the animals

ggplot(raw.plants, aes(x = MAP_damage, y = sample2, group = sample2)) +
  geom_density_ridges2(scale = 3, alpha = 0.55) +  # Plot density ridges first
  geom_point(position = position_jitter(height = 0.1, width = 0), alpha = 0.6) + # Add points above ridges
  facet_wrap(~ site, scales = "free_y") + # Split into facets by coreID
  labs(y = "Samples (ordered by age)", x = "MAP Damage",title = "Plants MAP Damage")+
  geom_text(aes(x = max(MAP_damage), label = age), hjust = -0.1, size = 3,nudge_y = 0.4)
## Picking joint bandwidth of NaN
## Picking joint bandwidth of 0.027
## Picking joint bandwidth of 0.0257
## Picking joint bandwidth of 0.0324
## Warning in FUN(X[[i]], ...): no non-missing arguments to max; returning -Inf
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_text()`).

ggplot(raw.plants, aes(x = mean_L, y = sample2, group = sample2)) +
  geom_density_ridges2(scale = 3, alpha = 0.55) +  # Plot density ridges first
  geom_point(position = position_jitter(height = 0.1, width = 0), alpha = 0.6) + # Add points above ridges
  facet_wrap(~ site, scales = "free_y") + # Split into facets by coreID
  labs(y = "Samples (ordered by age)", x = "MAP Damage",title = "Plants Mean Length")+
  geom_text(aes(x = max(MAP_damage), label = age), hjust = -0.1, size = 3,nudge_y = 0.4)
## Picking joint bandwidth of NaN
## Picking joint bandwidth of 4.2
## Picking joint bandwidth of 3.41
## Picking joint bandwidth of 4.55
## Warning in FUN(X[[i]], ...): no non-missing arguments to max; returning -Inf
## Warning in FUN(X[[i]], ...): Removed 5 rows containing missing values or values outside the scale range
## (`geom_text()`).

Richness Estimates

Now let’s look at some richness estimates per core

richness <- raw.metazoa.50.wide %>%
  select(-tax_name) %>%
  summarise(across(everything(), ~ sum(. > 0))) %>%
  pivot_longer(cols = everything(), names_to = "Sample", values_to = "Richness")

# Combine richness with metadata
richness_plot_data <- richness %>%
  left_join(metadata, by = "Sample")


ggplot(richness_plot_data, aes(x = age, y = Richness, color = CoreID)) +
  geom_point(size = 4) +
  theme_minimal() +
  labs(
    title = "Metazoan Genus Richness vs. Age by Core (50 reads)",
    x = "Age",
    y = "Richness",
    color = "Core ID"
  ) +
  theme(
    plot.title = element_text(size = 16, hjust = 0.5),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    legend.title = element_text(size = 12)
  )
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

## More reads in the earlier core portion - lets use breakaway estimates 

results <- breakaway_nof1(raw.metazoa.1.wide[,-1])
## Warning in breakaway_nof1.data.frame(., output, plot, answers, print): Assuming
## taxa are rows
named_estimates <- map_dbl(results, ~ .x$estimate)
names(named_estimates) <- names(results)  

diversity_df <- tibble(
  Sample = names(named_estimates),
  Richness = as.numeric(named_estimates)
)

# Merge with metadata
plot_data <- diversity_df %>%
  left_join(metadata, by = "Sample")

# Create the plot
ggplot(plot_data, aes(x = age, y = Richness, color = CoreID)) +
  geom_point(size = 4) +
  theme_minimal() +
  labs(
    title = "Metazoan Genus Richness (breakaway estimate) vs. Age by Core (50 reads)",
    x = "Age",
    y = "Richness",
    color = "Core ID"
  ) +
  theme(
    plot.title = element_text(size = 16, hjust = 0.5),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    legend.title = element_text(size = 12)
  )
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Analyze and plot for raw.plants.50.wide
richness_plants <- raw.plants.50.wide %>%
  select(-tax_name) %>%
  summarise(across(everything(), ~ sum(. > 0))) %>%
  pivot_longer(cols = everything(), names_to = "Sample", values_to = "Richness")

richness_plot_data_plants <- richness_plants %>%
  left_join(metadata, by = "Sample")

ggplot(richness_plot_data_plants, aes(x = age, y = Richness, color = CoreID)) +
  geom_point(size = 4) +
  theme_minimal() +
  labs(
    title = "Plant Genus Richness vs. Age by Core (50 reads)",
    x = "Age",
    y = "Richness",
    color = "Core ID"
  ) +
  theme(
    plot.title = element_text(size = 16, hjust = 0.5),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    legend.title = element_text(size = 12)
  )
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Analyze and plot for raw.bacteria.50.wide
richness_bacteria <- raw.bacteria.50.wide %>%
  select(-tax_name) %>%
  summarise(across(everything(), ~ sum(. > 0))) %>%
  pivot_longer(cols = everything(), names_to = "Sample", values_to = "Richness")

richness_plot_data_bacteria <- richness_bacteria %>%
  left_join(metadata, by = "Sample")

ggplot(richness_plot_data_bacteria, aes(x = age, y = Richness, color = CoreID)) +
  geom_point(size = 4) +
  theme_minimal() +
  labs(
    title = "Bacterial Genus Richness vs. Age by Core (50 reads)",
    x = "Age",
    y = "Richness",
    color = "Core ID"
  ) +
  theme(
    plot.title = element_text(size = 16, hjust = 0.5),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    legend.title = element_text(size = 12)
  )
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

Beta Div

Now let’s examine the structure between samples

First the animals

raw.metazoa.50.wide.d <- as.data.frame(raw.metazoa.50.wide)
rownames(raw.metazoa.50.wide.d) <- raw.metazoa.50.wide.d[,1]
raw.metazoa.50.wide.d <- raw.metazoa.50.wide.d[,-c(1,2,3)]

par(mfrow = c(1, 2)) 
metaz.ord <- metaMDS(vegdist(t(raw.metazoa.50.wide.d),method = "jaccard",binary = TRUE),trymax = 200)
## Run 0 stress 0.09047535 
## Run 1 stress 0.09620039 
## Run 2 stress 0.1034781 
## Run 3 stress 0.09724904 
## Run 4 stress 0.09255168 
## Run 5 stress 0.1037211 
## Run 6 stress 0.09047535 
## ... New best solution
## ... Procrustes: rmse 6.723257e-05  max resid 0.0003028085 
## ... Similar to previous best
## Run 7 stress 0.08949237 
## ... New best solution
## ... Procrustes: rmse 0.04889984  max resid 0.2418224 
## Run 8 stress 0.09531488 
## Run 9 stress 0.1008613 
## Run 10 stress 0.09398269 
## Run 11 stress 0.1097467 
## Run 12 stress 0.102172 
## Run 13 stress 0.09637486 
## Run 14 stress 0.1060932 
## Run 15 stress 0.1026671 
## Run 16 stress 0.08949243 
## ... Procrustes: rmse 4.785826e-05  max resid 0.0001667614 
## ... Similar to previous best
## Run 17 stress 0.1019101 
## Run 18 stress 0.1039842 
## Run 19 stress 0.09263534 
## Run 20 stress 0.09365274 
## *** Best solution repeated 1 times
metaz.ord.b <- metaMDS(vegdist(t(raw.metazoa.50.wide.d),method = "bray"),trymax = 200)
## Run 0 stress 0.1159157 
## Run 1 stress 0.1380456 
## Run 2 stress 0.1189671 
## Run 3 stress 0.1294571 
## Run 4 stress 0.1345533 
## Run 5 stress 0.1364903 
## Run 6 stress 0.1221178 
## Run 7 stress 0.1333038 
## Run 8 stress 0.1234697 
## Run 9 stress 0.1135296 
## ... New best solution
## ... Procrustes: rmse 0.05435373  max resid 0.2265867 
## Run 10 stress 0.1440678 
## Run 11 stress 0.137368 
## Run 12 stress 0.1416223 
## Run 13 stress 0.1200431 
## Run 14 stress 0.133896 
## Run 15 stress 0.1333023 
## Run 16 stress 0.1194143 
## Run 17 stress 0.1258652 
## Run 18 stress 0.1365786 
## Run 19 stress 0.118108 
## Run 20 stress 0.1206481 
## Run 21 stress 0.1308329 
## Run 22 stress 0.1135296 
## ... New best solution
## ... Procrustes: rmse 4.465205e-05  max resid 0.0002628834 
## ... Similar to previous best
## *** Best solution repeated 1 times
plot(metaz.ord$points[,1],metaz.ord$points[,2],pch=16,col=as.numeric(as.factor(metadata$CoreID[match(rownames(metaz.ord$points),metadata$Sample)])),main="Jaccard 50")
#ordisurf(metaz.ord,metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],add=TRUE)
text(metaz.ord$points[,1],metaz.ord$points[,2]+0.05,labels=metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],cex=0.8)
legend("topleft",bty='n', col=c(1,2,3),pch=16,legend=levels(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])))

plot(metaz.ord.b$points[,1],metaz.ord.b$points[,2],pch=16,col=as.numeric(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])),main="Bray 50")
#ordisurf(metaz.ord,metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],add=TRUE)
text(metaz.ord.b$points[,1],metaz.ord.b$points[,2]+0.05,labels=metadata$age[match(rownames(metaz.ord.b$points),metadata$Sample)],cex=0.8)
legend("topleft",bty='n', col=c(1,2,3),pch=16,legend=levels(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])))

Now plants…

raw.plants.50.wide.d <- as.data.frame(raw.plants.50.wide)
rownames(raw.plants.50.wide.d) <- raw.plants.50.wide.d[,1]
raw.plants.50.wide.d <- raw.plants.50.wide.d[,-c(1,2,3)]

par(mfrow = c(1, 2)) 
metaz.ord <- metaMDS(vegdist(t(raw.plants.50.wide.d),method = "jaccard",binary = TRUE),trymax = 200)
## Run 0 stress 0.1541789 
## Run 1 stress 0.1592949 
## Run 2 stress 0.152327 
## ... New best solution
## ... Procrustes: rmse 0.09168251  max resid 0.4404266 
## Run 3 stress 0.151069 
## ... New best solution
## ... Procrustes: rmse 0.07541321  max resid 0.4521563 
## Run 4 stress 0.4003393 
## Run 5 stress 0.1523271 
## Run 6 stress 0.1726165 
## Run 7 stress 0.1626629 
## Run 8 stress 0.1793365 
## Run 9 stress 0.1510478 
## ... New best solution
## ... Procrustes: rmse 0.002177563  max resid 0.01019873 
## Run 10 stress 0.1781054 
## Run 11 stress 0.1523271 
## Run 12 stress 0.1592951 
## Run 13 stress 0.1741706 
## Run 14 stress 0.1789104 
## Run 15 stress 0.1635321 
## Run 16 stress 0.1626628 
## Run 17 stress 0.1626626 
## Run 18 stress 0.1587963 
## Run 19 stress 0.1592949 
## Run 20 stress 0.1535169 
## Run 21 stress 0.1523273 
## Run 22 stress 0.1674026 
## Run 23 stress 0.1810176 
## Run 24 stress 0.1510476 
## ... New best solution
## ... Procrustes: rmse 0.0006422852  max resid 0.00328771 
## ... Similar to previous best
## *** Best solution repeated 1 times
metaz.ord.b <- metaMDS(vegdist(t(raw.plants.50.wide.d),method = "bray"),trymax = 200)
## Run 0 stress 0.1206552 
## Run 1 stress 0.122504 
## Run 2 stress 0.1321074 
## Run 3 stress 0.1185575 
## ... New best solution
## ... Procrustes: rmse 0.08221816  max resid 0.3337819 
## Run 4 stress 0.1383644 
## Run 5 stress 0.1036039 
## ... New best solution
## ... Procrustes: rmse 0.04040765  max resid 0.2536644 
## Run 6 stress 0.122763 
## Run 7 stress 0.1230242 
## Run 8 stress 0.1384721 
## Run 9 stress 0.1299303 
## Run 10 stress 0.1184053 
## Run 11 stress 0.1253682 
## Run 12 stress 0.1035932 
## ... New best solution
## ... Procrustes: rmse 0.002858671  max resid 0.01366886 
## Run 13 stress 0.1299268 
## Run 14 stress 0.103512 
## ... New best solution
## ... Procrustes: rmse 0.0154484  max resid 0.07688617 
## Run 15 stress 0.1035118 
## ... New best solution
## ... Procrustes: rmse 0.00011513  max resid 0.0006656998 
## ... Similar to previous best
## Run 16 stress 0.1278738 
## Run 17 stress 0.1362423 
## Run 18 stress 0.1298699 
## Run 19 stress 0.1391909 
## Run 20 stress 0.1185575 
## *** Best solution repeated 1 times
plot(metaz.ord$points[,1],metaz.ord$points[,2],pch=16,col=as.numeric(as.factor(metadata$CoreID[match(rownames(metaz.ord$points),metadata$Sample)])),main="Jaccard 50")
#ordisurf(metaz.ord,metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],add=TRUE)
text(metaz.ord$points[,1],metaz.ord$points[,2]+0.05,labels=metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],cex=0.8)
legend("topleft",bty='n', col=c(1,2,3),pch=16,legend=levels(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])))

plot(metaz.ord.b$points[,1],metaz.ord.b$points[,2],pch=16,col=as.numeric(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])),main="Bray 50")
#ordisurf(metaz.ord,metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],add=TRUE)
text(metaz.ord.b$points[,1],metaz.ord.b$points[,2]+0.05,labels=metadata$age[match(rownames(metaz.ord.b$points),metadata$Sample)],cex=0.8)
legend("topleft",bty='n', col=c(1,2,3),pch=16,legend=levels(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])))

Finally the bacteria

raw.bacteria.50.wide.d <- as.data.frame(raw.bacteria.50.wide)
rownames(raw.bacteria.50.wide.d) <- raw.bacteria.50.wide.d[,1]
raw.bacteria.50.wide.d <- raw.bacteria.50.wide.d[,-c(1,2,3)]

par(mfrow = c(1, 2)) 
metaz.ord <- metaMDS(vegdist(t(raw.bacteria.50.wide.d),method = "jaccard",binary = TRUE),trymax = 200)
## Run 0 stress 0.07618421 
## Run 1 stress 0.0886093 
## Run 2 stress 0.08250816 
## Run 3 stress 0.07646896 
## ... Procrustes: rmse 0.02101119  max resid 0.09510886 
## Run 4 stress 0.08839692 
## Run 5 stress 0.08250791 
## Run 6 stress 0.08747634 
## Run 7 stress 0.08374903 
## Run 8 stress 0.06992665 
## ... New best solution
## ... Procrustes: rmse 0.02497143  max resid 0.1033095 
## Run 9 stress 0.08628233 
## Run 10 stress 0.07562228 
## Run 11 stress 0.06992426 
## ... New best solution
## ... Procrustes: rmse 0.0004373639  max resid 0.002003678 
## ... Similar to previous best
## Run 12 stress 0.07618411 
## Run 13 stress 0.07041131 
## ... Procrustes: rmse 0.01902059  max resid 0.08940651 
## Run 14 stress 0.08201651 
## Run 15 stress 0.07618438 
## Run 16 stress 0.08593946 
## Run 17 stress 0.0699244 
## ... Procrustes: rmse 0.0001687932  max resid 0.0009818997 
## ... Similar to previous best
## Run 18 stress 0.08144867 
## Run 19 stress 0.07031802 
## ... Procrustes: rmse 0.01196637  max resid 0.0733124 
## Run 20 stress 0.07032135 
## ... Procrustes: rmse 0.01412343  max resid 0.08656295 
## *** Best solution repeated 2 times
metaz.ord.b <- metaMDS(vegdist(t(raw.plants.50.wide.d),method = "bray"),trymax = 200)
## Run 0 stress 0.1206552 
## Run 1 stress 0.1253682 
## Run 2 stress 0.1259867 
## Run 3 stress 0.126094 
## Run 4 stress 0.1035118 
## ... New best solution
## ... Procrustes: rmse 0.0687823  max resid 0.2587032 
## Run 5 stress 0.1184052 
## Run 6 stress 0.1246365 
## Run 7 stress 0.1321073 
## Run 8 stress 0.1253755 
## Run 9 stress 0.1035118 
## ... New best solution
## ... Procrustes: rmse 2.284231e-05  max resid 6.63103e-05 
## ... Similar to previous best
## Run 10 stress 0.1227575 
## Run 11 stress 0.1206551 
## Run 12 stress 0.1245438 
## Run 13 stress 0.1184052 
## Run 14 stress 0.143572 
## Run 15 stress 0.1278739 
## Run 16 stress 0.1227576 
## Run 17 stress 0.137556 
## Run 18 stress 0.1279246 
## Run 19 stress 0.1227629 
## Run 20 stress 0.1036039 
## ... Procrustes: rmse 0.01508932  max resid 0.07600722 
## *** Best solution repeated 1 times
plot(metaz.ord$points[,1],metaz.ord$points[,2],pch=16,col=as.numeric(as.factor(metadata$CoreID[match(rownames(metaz.ord$points),metadata$Sample)])),main="Jaccard 50")
#ordisurf(metaz.ord,metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],add=TRUE)
text(metaz.ord$points[,1],metaz.ord$points[,2]+0.05,labels=metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],cex=0.8)
legend("topleft",bty='n', col=c(1,2,3),pch=16,legend=levels(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])))

plot(metaz.ord.b$points[,1],metaz.ord.b$points[,2],pch=16,col=as.numeric(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])),main="Bray 50")
#ordisurf(metaz.ord,metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],add=TRUE)
text(metaz.ord.b$points[,1],metaz.ord.b$points[,2]+0.05,labels=metadata$age[match(rownames(metaz.ord.b$points),metadata$Sample)],cex=0.8)
legend("topleft",bty='n', col=c(1,2,3),pch=16,legend=levels(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])))

Taxonomic Overview

Here we are showing taxonomic diversity for all samples across all cores, including only spp with high certainty (>100 reads), showing only 240 out of 322.

par(mfrow = c(1, 1)) 


raw.metazoa.wide.d <- as.data.frame(raw.metazoa.wide)
rownames(raw.metazoa.wide.d) <- raw.metazoa.wide.d[,1]
raw.metazoa.wide.d <- raw.metazoa.wide.d[,-1]
raw.metazoa.wide.d <- raw.metazoa.wide.d[order(rowSums(raw.metazoa.wide.d),decreasing = TRUE),]

strat.plot(t(raw.metazoa.wide.d)[,1:40],yvar = metadata$age[match(colnames(raw.metazoa.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.metazoa.wide.d)[,41:80],yvar = metadata$age[match(colnames(raw.metazoa.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.metazoa.wide.d)[,81:120],yvar = metadata$age[match(colnames(raw.metazoa.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.metazoa.wide.d)[,121:160],yvar = metadata$age[match(colnames(raw.metazoa.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.metazoa.wide.d)[,161:200],yvar = metadata$age[match(colnames(raw.metazoa.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.metazoa.wide.d)[,201:240],yvar = metadata$age[match(colnames(raw.metazoa.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

Let’s have a look at the plants, showing all 166.

raw.plants.wide.d <- as.data.frame(raw.plants.wide)
rownames(raw.plants.wide.d) <- raw.plants.wide.d[,1]
raw.plants.wide.d <- raw.plants.wide.d[,-1]
raw.plants.wide.d <- raw.plants.wide.d[order(rowSums(raw.plants.wide.d),decreasing = TRUE),]

strat.plot(t(raw.plants.wide.d)[,1:40],yvar = metadata$age[match(colnames(raw.plants.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.plants.wide.d)[,41:80],yvar = metadata$age[match(colnames(raw.plants.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.plants.wide.d)[,81:120],yvar = metadata$age[match(colnames(raw.plants.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.plants.wide.d)[,121:166],yvar = metadata$age[match(colnames(raw.plants.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

Finally the bacteria, showing only 360 out of 1049.

raw.bacteria.wide.d <- as.data.frame(raw.bacteria.wide)
rownames(raw.bacteria.wide.d) <- raw.bacteria.wide.d[,1]
raw.bacteria.wide.d <- raw.bacteria.wide.d[,-1]
raw.bacteria.wide.d <- raw.bacteria.wide.d[order(rowSums(raw.bacteria.wide.d),decreasing = TRUE),]

strat.plot(t(raw.bacteria.wide.d)[,1:40],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.bacteria.wide.d)[,41:80],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.bacteria.wide.d)[,81:120],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.bacteria.wide.d)[,121:160],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.bacteria.wide.d)[,161:200],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.bacteria.wide.d)[,201:240],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.bacteria.wide.d)[,241:280],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.bacteria.wide.d)[,281:320],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")

strat.plot(t(raw.bacteria.wide.d)[,321:360],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")